library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v readr 1.3.1
## v tibble 1.4.2 v purrr 0.2.5
## v tidyr 0.8.2 v stringr 1.3.1
## v ggplot2 3.1.0 v forcats 0.4.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(cluster)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
domestict20Bowling <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\domestict20careerbowlingrating_mod.csv")
head(domestict20Bowling)
## Name Balls Maidens Runs Wickets Average X4_Wicket_Hauls
## 1 S L Malinga 4835 13 5393 299 18.03 8
## 2 S P Narine 4112 15 3856 227 16.98 10
## 3 D J J Bravo 5781 2 7772 317 24.51 7
## 4 Shahid Afridi 4650 9 5177 236 21.93 4
## 5 A C Thomas 4558 15 5739 263 21.82 4
## 6 D P Nannes 4624 9 5719 257 22.25 7
## X5_Wicket_Hauls Strike_Rate Economy Rating ScaledRating LogRating
## 1 4 16.17 6.69 11985.215 20.93 4.078646
## 2 1 18.11 5.62 9781.489 19.89 3.990405
## 3 1 18.23 8.06 9276.494 19.63 3.967384
## 4 2 19.70 6.68 7491.174 18.61 3.874550
## 5 1 17.33 7.55 7276.598 18.47 3.861928
## 6 2 17.99 7.42 7198.086 18.42 3.857217
## Best_Bowling
## 1 06-Jul
## 2 May-19
## 3 May-23
## 4 05-Jul
## 5 May-24
## 6 May-31
summary(domestict20Bowling)
## Name Balls Maidens Runs
## S Sharma : 2 Min. : 1 Min. : 0.00 Min. : 0.0
## Yuvraj Singh : 2 1st Qu.: 202 1st Qu.: 0.00 1st Qu.: 252.5
## A A Chavan : 1 Median : 689 Median : 1.00 Median : 874.0
## A A Jhunjhunwala: 1 Mean :1094 Mean : 2.03 Mean :1353.2
## A A Kazi : 1 3rd Qu.:1636 3rd Qu.: 3.00 3rd Qu.:2008.5
## A A Noffke : 1 Max. :5781 Max. :19.00 Max. :7772.0
## (Other) :387
## Wickets Average X4_Wicket_Hauls X5_Wicket_Hauls
## Min. : 0.00 Min. : 0.00 Min. : 0.0000 Min. :0.0000
## 1st Qu.: 9.00 1st Qu.: 21.61 1st Qu.: 0.0000 1st Qu.:0.0000
## Median : 36.00 Median : 24.60 Median : 0.0000 Median :0.0000
## Mean : 55.12 Mean : 25.42 Mean : 0.9392 Mean :0.2329
## 3rd Qu.: 81.50 3rd Qu.: 29.23 3rd Qu.: 1.0000 3rd Qu.:0.0000
## Max. :317.00 Max. :162.00 Max. :10.0000 Max. :4.0000
##
## Strike_Rate Economy Rating ScaledRating
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 17.02 1st Qu.: 7.175 1st Qu.: 8.067 1st Qu.: 3.370
## Median : 19.75 Median : 7.660 Median : 140.498 Median : 6.890
## Mean : 19.78 Mean : 7.849 Mean : 740.900 Mean : 7.173
## 3rd Qu.: 22.59 3rd Qu.: 8.170 3rd Qu.: 709.812 3rd Qu.:10.325
## Max. :114.00 Max. :24.000 Max. :11985.215 Max. :20.930
##
## LogRating Best_Bowling
## Min. :-1.3802 Mar-18 : 10
## 1st Qu.: 0.9064 Apr-15 : 9
## Median : 2.1477 Mar-13 : 9
## Mean : 1.8447 Apr-13 : 8
## 3rd Qu.: 2.8511 Apr-14 : 8
## Max. : 4.0786 04-Oct : 7
## (Other):344
set.seed(20)
domesticBowlCluster <- kmeans(domestict20Bowling[, 2:13], 5)
domesticBowlCluster$cluster <- as.factor(domesticBowlCluster$cluster)
ggplot(domestict20Bowling, aes(Economy, Wickets, color = domesticBowlCluster$cluster)) +
geom_point(size = 2) +
scale_color_hue(labels = c("Bad players", "Good players", "Best Players")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Domestic T20 Bowling Average vs Wickets")

library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- plot_ly(domestict20Bowling, x = ~Economy, y = ~Wickets, type = 'scatter',
mode = 'markers', color = domesticBowlCluster$cluster,
text = ~paste('Name: ', Name)) %>%
layout(title = "Cluster of Averages (batsmen)")
p